Adam Mahood
2/8/2022
Applying a new techniqe to data from an old study
Feel free to give it a read! It’s my master’s thesis.
If you would like to follow along in R:
git clone https://github.com/admahood/ff_study.giteds_talk.Rmd in the top directory# for the actual analysis
library(tidyverse)
library(Hmsc)
require(snow)
# for the aesthetics
library(ggtext)
library(knitr)
library(kableExtra)
library(ggpubr) I will begin with some background, so people who want to follow along will be able to get caught up on all that stuff
They look the same but are they really the same?
For more information:
div_wide <- read.csv("Data/FF_All_plots.csv") %>%
dplyr::select(-SS1, -SS2,-Seedlings) %>%
mutate(Plot = substr(Image,1,9)) %>%
mutate(UNK9 = UNK2 + UNK9) %>% # UNK2 = UNK9!
mutate(LEPE2 = LEPE2 + LEPE2.1) %>%
dplyr::select(-UNK2, -Image, -LEPE2.1) %>%
dplyr::rename(CYMOP2 = CYMO,
BAPR5 = BAPR,
ERNA10 = ERNA,
CRYPT = CRYPTANTHASP,
ELCA13 = ELCA,
ACMI2 = ACMI) %>%
group_by(Plot) %>%
summarise_all(mean, na.rm=TRUE) %>%
dplyr::select(-ssMean, -LITTER, -BARE, -SHADOW,-Seedling_presence,
-UNCLEAR, -WOOD, -SCAT, -DUNG, -ROCK,-CRUST) %>%
dplyr::arrange(Plot) %>%
tibble::column_to_rownames("Plot")
ENV <- read.csv("Data/FF_All_plots.csv") %>%
dplyr::select(Image, ssMean, Seedlings, Seedling_presence, LITTER, BARE, ROCK,
CRUST, SHADOW, UNCLEAR, WOOD, SCAT, DUNG) %>%
mutate(plot = substr(Image, 1, 9)) %>%
dplyr::select(-Image) %>%
group_by(plot) %>%
summarise_all(mean, na.rm=TRUE) %>%
mutate(Seedling_presence = as.factor(ifelse(Seedling_presence >0, "present", "absent")))
env_data <- read.csv("Data/FF_plot_level.csv") %>%
dplyr::rename(plot = X)%>%
mutate(burned = as.factor(ifelse(FF == 0, "unburned", "burned")),
soil_carbon_gm2 = Soil_BD * Soil_OM * 100,
soil_nitrogen_gm2 = Soil_BD * Soil_TN * 100,
BRTE_carbon_gm2 = BRTE_mass/.9 * BRTE_TC/100,
BRTE_nitrogen_gm2 = BRTE_mass/.9 * BRTE_TN/100,
Soil_ag_stab = ENV$ssMean,
FF=as.factor(FF),
Seedling_presence = as.factor(Seedling_presence),
exotic_cover = EAF + EAG + EPF + EPG,
native_cover = NAF + NPF + NPG + shrub_cover_PQ,
adj_TSF = ifelse(is.na(TSF)==TRUE, 100, TSF),
block = substr(plot, 1, 3),
block = as.factor(ifelse(block == "F03", "F05", block))
) %>%
dplyr::select(FF,BRTE_TN, BRTE_TC, Soil_BD, Soil_OM, Soil_TN,plot,
netMineralization, Elevation, folded_aspect, AUM_acre,
soil_carbon_gm2, soil_nitrogen_gm2, Soil_ag_stab,burned
) %>%
left_join(ENV) %>%
arrange(plot) Y <- div_wide %>%
as.matrix
Y[Y>0] <-1
# We can do abundance by saying Y <- Y/100
prevalence<- colSums(Y) %>%
as_tibble(rownames = "Species") %>%
dplyr::rename(prevalence = value) %>%
arrange(desc(prevalence))
XData <- env_data %>%
mutate(site = str_sub(plot, 1,3)) %>%
dplyr::select(-starts_with(c("BRTE", "Soil_", "net", "soil_")))%>%
mutate(plot = as.factor(plot)) %>%
tibble::column_to_rownames("plot")%>%
mutate_if(is.character, as.factor) %>%
as.data.frame() XFormula <- ~ Elevation+
folded_aspect +
FF +
LITTER +
AUM_acre
# insert traits and phylogeny here
studyDesign <- data.frame(site = as.factor(XData$site))
rL <- HmscRandomLevel(units = studyDesign$site)
mod = Hmsc(Y = Y, XData = XData, XFormula = XFormula, distr="probit",
#TrData = traits,
#TrFormula = tr_form,
studyDesign = studyDesign,
ranLevels = list("site" = rL))nChains = 2
test.run = FALSE
if (test.run){
#with this option, the vignette evaluates in ca. 1 minute in adam's laptop
thin = 10
samples = 100
transient = ceiling(thin*samples*.5)
}else{
# with a spatial random effect, evaluates in 20 minutes on adam's laptop
thin = 10
samples = 1000
transient = ceiling(thin*samples*.5)
}This stuff takes a long time! It’s a really good idea to keep track of how long things take, and save your results.
t0 <- Sys.time()
hmsc_file <- "Data/hmsc/hmsc_probit_test.Rda"
dir.create("Data/hmsc")
if(!file.exists(hmsc_file)){
m = sampleMcmc(mod, thin = thin,
samples = samples,
transient = transient,
adaptNf = rep(ceiling(0.4*samples*thin),1),
nChains = nChains,
nParallel = nChains)
print(Sys.time()-t0)
save(m, file=hmsc_file)
}else{load(hmsc_file)}ess.beta <- effectiveSize(mpost$Beta) %>%
as_tibble() %>% dplyr::rename(ess_beta = value)
psrf.beta <- gelman.diag(mpost$Beta, multivariate=FALSE)$psrf%>%
as_tibble() %>% dplyr::rename(psrf_beta = `Point est.`)
diag_all <- ggarrange(ggplot(ess.beta, aes(x=ess_beta)) +
geom_histogram()+
xlab("Effective Sample Size"),
ggplot(psrf.beta, aes(x=psrf_beta)) +
geom_histogram()+
xlab("Gelman Diagnostic"),
align = "v") +ggtitle("All Plots")
ggsave(diag_all,filename = "figures/geldman_ess.pdf", width = 5.5, height=3.5, bg="white")
diag_allESS should be around 2000, Gelman <1.001
## [1] 0.2441953
data.frame(species = colnames(Y),TjurR2 = MF$TjurR2 ) %>%
dplyr::arrange(desc(TjurR2)) %>%
knitr::kable() %>%
kableExtra::kable_styling()| species | TjurR2 |
|---|---|
| SIAL2 | 0.6814319 |
| ARTRW8 | 0.5948806 |
| ELEL5 | 0.5558367 |
| LARA | 0.4825071 |
| CETE5 | 0.4285283 |
| ELCI2 | 0.3808974 |
| LEPE2 | 0.3602342 |
| CONVO | 0.3422957 |
| ALMI2 | 0.3279966 |
| TESP | 0.3190593 |
| ARAR8 | 0.3150509 |
| DESO | 0.3132418 |
| UNK9 | 0.2864504 |
| DEPI | 0.2637565 |
| CROC | 0.2627002 |
| UNK13 | 0.2566966 |
| PECTO | 0.2558896 |
| UNK14 | 0.2527971 |
| DEGL3 | 0.2514971 |
| PASM | 0.2493226 |
| GARA2 | 0.2486630 |
| AGCR | 0.2359842 |
| IVAX | 0.2288742 |
| LUP3 | 0.2283799 |
| ACMI2 | 0.2279602 |
| ERNA10 | 0.2241802 |
| ERCI6 | 0.2235801 |
| CYMOP2 | 0.2193784 |
| CADR | 0.2152466 |
| CABR4 | 0.2114770 |
| STPA4 | 0.2097289 |
| ELCA13 | 0.2016661 |
| AMIN3 | 0.1791749 |
| POSE | 0.1786637 |
| UNK22 | 0.1603949 |
| UNK24 | 0.1560609 |
| UNK1 | 0.1555945 |
| GRSP | 0.1538781 |
| ERTE18 | 0.1537362 |
| LAGL5 | 0.1535074 |
| ALLIU | 0.1515959 |
| ERUM | 0.1456572 |
| CHVI8 | 0.1390456 |
| CRYPT | 0.1008051 |
| LUAR3 | 0.0724877 |
| PEBO2 | 0.0716562 |
| PHDI3 | 0.0699489 |
| UNK8 | 0.0696865 |
| BAPR5 | -0.0025157 |
| BRTE | NaN |
| LUP1 | NaN |
# explanatory power
ggarrange(
ggplot(as.data.frame(MF),aes(x=(RMSE))) + geom_histogram(),
ggplot(as.data.frame(MF),aes(x=(TjurR2))) + geom_histogram(),
ggplot(as.data.frame(MF),aes(x=(AUC))) + geom_histogram())mf_df <- data.frame(Species = colnames(m$Y),
R2 = MF$TjurR2,
AUC = MF$AUC,
RMSE = MF$RMSE) %>%
left_join(prevalence)
mean(mf_df%>% filter(prevalence>7) %>% pull(R2), na.rm=T)## [1] 0.4324379
sbquants <- summary(mpost$Beta)$quantiles %>%
as_tibble(rownames = "variable") %>%
mutate(sign = `2.5%` * `97.5%`) %>%
filter(sign>0) %>%
separate(variable,
into = c("variable", "species"),
sep = ",") %>%
mutate(variable = str_sub(variable, 3,nchar(variable)-5),
species = str_sub(species, 2,nchar(species)-6) %>% trimws) %>%
filter(variable!= "(Intercept)") %>%
dplyr::select(variable,species,`2.5%`,`50%`,`97.5%`) %>%
arrange(variable)
vp_df <- VP$vals%>%
as_tibble(rownames = "variable") %>%
pivot_longer(cols=names(.)[2:ncol(.)],
names_to = "Species",
values_to = "value") %>%
left_join(prevalence) %>%
na.omit()
vp_summary <- vp_df %>%
group_by(variable) %>%
summarise(value = mean(value)) %>%
ungroup()
vp_order <- vp_df %>%
filter(variable == "FF") %>%
arrange(value) %>%
mutate(Species_f = factor(Species, levels = .$Species)) %>%
dplyr::select(Species, Species_f)
#
# vp_order <- vp_df %>% filter(variable == "Random: sample") %>%
# filter(origin=="I") %>%
# left_join(prevalence) %>%
# arrange(prevalence, origin) %>%
# mutate(Species_f = factor(Species, levels = .$Species)) %>%
# dplyr::select(Species, Species_f, origin) %>%
# rbind(vp_order_n)# %>%
# #left_join(mf_df)
vp <- left_join(vp_df, vp_order) %>%
mutate(variable = factor(variable, c( "Elevation",
"Random: site" ,
"folded_aspect",
"LITTER" ,
"AUM_acre",
"FF" )))%>%
ggplot(aes(x=value,y=Species_f, fill = variable)) +
geom_bar(stat="identity")+
theme_classic() +
ylab("Species") +
xlab("Proportion of Variance Explained") +
scale_fill_brewer(palette = "Dark2")+
theme(legend.position = "right",
legend.text = element_markdown(),
legend.title = element_blank(),
legend.justification = c(1,0),
legend.background = element_rect(color="black")) +
ggtitle("Variance Partitioning, Occurrence Model")
ggsave(vp, filename="figures/variance_partitioning_occurrence.png", height = 11.5, width = 9)
vpor, how are species responding to environmental drivers
postBeta <- getPostEstimate(m, parName = "Beta")
means <- postBeta$mean %>%
as_tibble() %>%
rowid_to_column("env_var") %>%
mutate(env_var =c("intercept", "Elevation","folded_aspect" ,"FF1", "FF2", "FF3", "Litter", "AUM_acre")) %>%
pivot_longer(cols=names(.)[2:ncol(.)], names_to = "Species", values_to = "Mean")
supported <- postBeta$support %>%
as_tibble() %>%
rowid_to_column("env_var") %>%
mutate(env_var = c("intercept", "Elevation","folded_aspect" ,"FF1", "FF2", "FF3", "Litter", "AUM_acre")) %>%
pivot_longer(cols=names(.)[2:ncol(.)],
names_to = "Species",
values_to = "Support") %>%
filter(Support >0.95|Support<0.05,
env_var != "intercept") %>%
left_join(means, by = c("env_var", "Species"))%>%
mutate(sign = ifelse(Mean>0, "+", "-"))#%>%
# left_join(vp_order_n)
p_beta<- ggplot(supported, aes(x=env_var,y=Species, fill = Mean, color = sign)) +
geom_tile(lwd=.5) +
theme_classic()+
scale_fill_steps2(mid = "grey90") +
scale_color_manual(values = c(("red"), ("blue"))) +
guides(color = "none")+
theme(axis.text.x = element_text(angle=45, vjust=1,hjust = 1),
axis.title = element_blank())
ggsave(p_beta,filename = "figures/betas_binomial.png", bg="white")
p_betaplotBeta(m, post = postBeta, param = "Support",
supportLevel = 0.95, split=.4, spNamesNumbers = c(T,F))gradient = constructGradient(m, focalVariable = "LITTER",
non.focalVariables = 1)
predY_LITTER = predict(m, XData=gradient$XDataNew, expected=TRUE,
studyDesign=gradient$studyDesignNew,
ranLevels=gradient$rLNew)
n_runs <- nChains*samples
pred_df <- do.call("rbind", predY_LITTER) %>%
as_tibble() %>%
mutate(LITTER = rep(gradient$XDataNew$LITTER,
n_runs),
run = rep(1:n_runs,each=20)) %>%
pivot_longer(values_to = "cover", names_to = "Species", -c(LITTER,run)) %>%
left_join(prevalence) %>%
filter(prevalence > 1) %>%
arrange(desc(prevalence)) %>%
mutate(Species_f = factor(Species, levels = unique(.$Species)))
grazing_native <- pred_df %>%
ggplot(aes(x=LITTER, y=cover)) +
geom_line(alpha = 0.03, aes(group=run), key_glyph="rect")+
# geom_line(data = pred_df_grazing,
# lwd=1, alpha=0.95, color="black", aes(y=mean))+
facet_wrap(~Species_f, nrow=4)+
xlab("Litter Cover (Percent)") +
ylab("Probability of Occurrence") +
guides(color=guide_legend(override.aes = list(alpha=1)))+
# scale_color_brewer(palette = "Dark2") +
# scale_color_manual(values = pal_nat)+
theme(legend.position = "right",
strip.text = element_markdown(),
panel.border = element_rect(fill=NA, size=0.75),
legend.justification = c(1,0),
legend.title = element_blank())
ggsave(grazing_native, filename = "figures/gradient_LITTER.png", width = 20, height = 10)
grazing_nativegradient = constructGradient(m, focalVariable = "FF")
predY_ff = predict(m, XData=gradient$XDataNew, expected=TRUE, studyDesign=gradient$studyDesignNew,
ranLevels=gradient$rLNew)
n_runs <- nChains*samples
pred_df_ff <- do.call("rbind", predY_ff) %>%
as_tibble() %>%
mutate(FF = rep(gradient$XDataNew$FF,
n_runs),
run = rep(1:n_runs, each=4)) %>%
pivot_longer(values_to = "cover", names_to = "Species", -c(FF,run)) %>%
left_join(prevalence) %>%
filter(prevalence > 1) %>%
arrange(desc(prevalence)) %>%
mutate(Species_f = factor(Species, levels = unique(.$Species)))
ff_native <- pred_df_ff %>%
ggplot(aes(x=FF, y=cover)) +
geom_jitter(alpha = 0.03, aes(group=run), key_glyph="rect")+
# geom_line(data = pred_df_grazing,
# lwd=1, alpha=0.95, color="black", aes(y=mean))+
facet_wrap(~Species_f, nrow=4)+
xlab("Fire Frequency") +
ylab("Probability of Occurrence") +
guides(color=guide_legend(override.aes = list(alpha=1)))+
# scale_color_brewer(palette = "Dark2") +
# scale_color_manual(values = pal_nat)+
theme(legend.position = "right",
strip.text = element_markdown(),
panel.border = element_rect(fill=NA, size=0.75),
legend.justification = c(1,0),
legend.title = element_blank())
ggsave(ff_native, filename = "figures/gradient_ff.png", width = 20, height = 10)
ff_nativeOmegaCor = computeAssociations(m)
supportLevel = 0.89
toPlot = ((OmegaCor[[1]]$support>supportLevel) +
(OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$mean
toPlot_p = ((OmegaCor[[1]]$support>supportLevel) +
(OmegaCor[[1]]$support<(1-supportLevel))>0)*OmegaCor[[1]]$support
hmdf_mean <- OmegaCor[[1]]$mean %>%
as.matrix
hmdf_support <- OmegaCor[[1]]$support %>%
as.matrix
omega_plot<- ggcorrplot::ggcorrplot(hmdf_mean, type = "lower",
hc.order = TRUE, title = "Occurrence")
ggsave(omega_plot,
filename="figures/species_associations.png",
bg="white", width=18, height = 18)
omega_plot